# name : pdfplumber pdf to csv
# key : pdfplumber.pdf.to.csv
# contributor : Shuguang Sun
# --
${2:pdf} = pdfplumber.open('${1:file.pdf}')
total_pd = pd.DataFrame()
pdf_columns = list()
for page in range(len($1.pages)):
    print(page)
    if len($1.pages) > 0:
        temp_table = $1.pages[page].extract_table()
        if page == 0:
            temp_df = pd.DataFrame(temp_table[1:], columns = temp_table[0])
            temp_df.columns = [ temp.replace('\n', '') for temp in temp_df.columns ]
            pdf_columns = temp_df.columns
        else:
            temp_df = pd.DataFrame(temp_table)
            temp_df.columns = pdf_columns
        temp_df.replace(to_replace = r'\n', value = '', regex = True, inplace = True)
        total_pd = pd.concat([total_pd, temp_df], ignore_index = True)
        
# save to csv
total_pd.to_csv('${3:file}.csv', header = True, index = False)
# save to excel
total_pd.to_excel('$3.xlsx', header = True, index = False)